1 Load libraries

library(tidyverse)          
library(clusterProfiler)    
library(enrichplot)         
library(org.Hs.eg.db) 
library(Cairo) 
library(VennDiagram)       
library(grid)               
library(simplifyEnrichment)

2 Define function

# map_cluster_number: 
#   - x: enrichResult or compareClusterResult (from clusterProfiler)
#   - df: simplifyGO result data.frame with columns ID (term IDs) and Cluster_num
#   - comp: if TRUE, use x@compareClusterResult, else x@result
# Returns a data.frame of all enrich columns + term_size + Cluster_num
map_cluster_number <- function(x,
                               df,
                               comp = FALSE) {
  ## 1. Standardize the input DF’s column names
  colnames(df) <- c("ID", "Cluster_num")

  ## 2. Select the correct slot from the clusterProfiler object
  if (comp) {
    # from compareClusterResult
    results <- x@compareClusterResult
  } else {
    # from a single enrichResult
    results <- x@result
  }

  ## 3. Get term_size (the numerator of the "BgRatio" string)
  ##    e.g. "12/21273" → 12
  results$term_size <- as.numeric(
    sapply(strsplit(results$BgRatio, "/"),
           function(parts) as.numeric(parts[1]))
  )

  ## 4. Merge the enrichment results with the cluster assignments
  merged_results <- merge(
    results,
    df,
    by    = "ID",
    all.x = TRUE
  )

  ## 5. Return the processed data frame
  return(merged_results)
}


# Function to map Ensembl IDs to gene symbols for a single row
map_geneID_to_symbol <- function(geneID_str) {
  geneIDs <- unlist(strsplit(geneID_str, "/"))
  mapped_ids <- bitr(geneIDs, 
                     fromType = "ENSEMBL", 
                     toType = "SYMBOL", 
                     OrgDb = org.Hs.eg.db)
  
  # drop duplicates in ensembl column and keep first occurence
  mapped_ids <- mapped_ids[!duplicated(mapped_ids$ENSEMBL),]
  
  gene_symbols <- mapped_ids$SYMBOL
  return(paste(gene_symbols, collapse = "/"))
}


# Function to map Ensembl IDs to gene symbols for a single row
map_geneID_to_name <- function(geneIDs) {
  mapped_ids <- bitr(geneIDs, 
                     fromType = "ENSEMBL", 
                     toType = c("GENENAME", "SYMBOL"), 
                     OrgDb = org.Hs.eg.db)
  
  # drop duplicates in ensembl column and keep first occurence
  mapped_ids <- mapped_ids[!duplicated(mapped_ids$ENSEMBL),]
  
  return(mapped_ids)
}

3 Set input and output paths

# set input and output paths
in_path <- "/mnt/Data/Projects/Cilia/revision/Restricted/data/"
out_path <- "/mnt/Data/Projects/Cilia/revision/Restricted/analysis/GO_BP/"

4 Load data

# Load the data
df_all <- read.delim(paste0(in_path, "Restricted_files_combined_as_cytoscape_input.csv"), sep = "\t", header = TRUE, row.names = 1)
head(df_all)

5 Compare biological themes for different cell lines (all locations pooled)

6 Split data by cell line

# Identify columns that contain "num" in their names
num_columns <- grep("num", names(df_all), value = TRUE)

# Convert all other columns from string to logical
df_all <- df_all %>%
  mutate(across(-all_of(num_columns), ~ as.logical(.)))

# Perform filtering again
df_ASC52telo <- df_all %>% filter(ASC52telo == TRUE)
df_hTERT <- df_all %>% filter(hTERT_RPE1_serum_starved == TRUE)
df_RPTEC_TERT1 <- df_all %>% filter(RPTEC_TERT1 == TRUE)

# filter gene_id by cell line
gene_id_ASC52telo <- row.names(df_ASC52telo)
gene_id_hTERT <- row.names(df_hTERT)
gene_id_RPTEC_TERT1 <- row.names(df_RPTEC_TERT1)
# prepare input data
input_genes <- list(
  ASC52telo = gene_id_ASC52telo,
  hTERT_RPE1_serum_starved = gene_id_hTERT,
  RPTEC_TERT1 = gene_id_RPTEC_TERT1
)

6.1 GO BP enrichment analysis

# Perform the compareCluster analysis
comp <- compareCluster(geneCluster = input_genes,
                     fun = "enrichGO",
                     OrgDb = org.Hs.eg.db,
                     keyType       = 'ENSEMBL',
                     ont           = "BP",
                     pAdjustMethod = "BH",
                     pvalueCutoff  = 0.01,
                     qvalueCutoff  = 0.01)

6.1.1 Save results as csv file

dotplot(comp, showCategory = NULL) + ggtitle("GO BP Enrichment Comparison", subtitle = "qvalue < 0.01") +
  scale_y_discrete(labels = function(x) str_wrap(x, width = 100))

# save dotplot as svg file
ggsave(paste0(out_path, "Restricted_comparison_cell_lines_GO_BP_dotplot.svg"), plot = last_plot(), device = "svg", width = 14, height = 40)

6.1.2 Visualize overlap of terms

# extract results
results <- comp@compareClusterResult

# prepare input data
input_genes <- list(
  ASC52telo = gene_id_ASC52telo,
  hTERT_RPE1_serum_starved = gene_id_hTERT,
  RPTEC_TERT1 = gene_id_RPTEC_TERT1
)

# split by location
ASC52telo <- results[results$Cluster == "ASC52telo",]
hTERT_RPE1_serum_starved <- results[results$Cluster == "hTERT_RPE1_serum_starved",]
RPTEC_TERT1 <- results[results$Cluster == "RPTEC_TERT1",]

# Create a list of the four sets
go_lists <- list(
  ASC52telo = ASC52telo$ID,
  hTERT_RPE1_serum_starved = hTERT_RPE1_serum_starved$ID,
  RPTEC_TERT1 = RPTEC_TERT1$ID
)

# Plot the Venn diagram
venn.plot <- venn.diagram(
  x = go_lists,
  category.names = c("ASC52telo", "hTERT_RPE1_serum_starved", "RPTEC_TERT1"),
  filename = NULL,
  output = TRUE
)

grid.newpage()
grid.draw(venn.plot)

# Save the captured plot as an SVG file
svg(paste0(out_path, "Restricted_comparison_cell_lines_GO_BP_venn.svg"), width = 7, height = 6)
grid.draw(venn.plot)
dev.off()
## svg 
##   2

6.1.3 Filter for terms only enriched for one of the locations

 # get results as data frame
comp_results <- comp@compareClusterResult

# Step 2: Count occurrences
term_counts <- table(comp_results$ID)

# Step 3: Filter proteins that appear at least twice
terms_at_least_twice <- names(term_counts[term_counts >= 2])

# remove terms that are enriched in more than one cell line
unspecific_terms <- comp_results[comp_results$ID %in% terms_at_least_twice,]
specific_terms <- comp_results[!comp_results$ID %in% terms_at_least_twice,]

# create a copy of comp
comp_filtered <- comp

# update results in comp
comp_filtered@compareClusterResult <- specific_terms

6.1.4 Dot plot of uniquely enriched terms

# plot dotplot
dotplot(comp_filtered, showCategory = NULL) + ggtitle("GO BP Enrichment Comparison", subtitle = "qvalue < 0.01")  +
  scale_y_discrete(labels = function(x) str_wrap(x, width = 100))

# save dotplot as svg file
ggsave(paste0(out_path, "Restricted_comparison_cell_lines_GO_BP_dotplot_specific_terms_only.svg"), plot = last_plot(), device = "svg", width = 14, height = 50, limitsize = FALSE)

6.2 Clustering of enriched GO BP terms

# Subset for pc_tip
ASC52telo <- comp_filtered@compareClusterResult[
  comp_filtered@compareClusterResult$Cluster == "ASC52telo", 
]

# Subset for bb_tz
hTERT_RPE1_serum_starved <- comp_filtered@compareClusterResult[
  comp_filtered@compareClusterResult$Cluster == "hTERT_RPE1_serum_starved", 
]

# Subset for bb_tz
RPTEC_TERT1 <- comp_filtered@compareClusterResult[
  comp_filtered@compareClusterResult$Cluster == "RPTEC_TERT1", 
]

# Create new compareClusterResult objects for each subset
comp_filtered_ASC52telo <- comp_filtered
comp_filtered_hTERT_RPE1_serum_starved <- comp_filtered
comp_filtered_RPTEC_TERT1 <- comp_filtered

comp_filtered_ASC52telo@compareClusterResult <- ASC52telo
comp_filtered_hTERT_RPE1_serum_starved@compareClusterResult <- hTERT_RPE1_serum_starved
comp_filtered_RPTEC_TERT1@compareClusterResult <- RPTEC_TERT1

6.2.1 Cluster results - ASC52telo

go_id = comp_filtered_ASC52telo@compareClusterResult$ID
mat = GO_similarity(go_id, 
                    ont = 'BP', 
                    db = 'org.Hs.eg.db', 
                    measure = "Sim_Relevance_2006")

6.2.1.1 Plot cluster heatmap

# Capture the plot
heatmap_plot <- grid.grabExpr({
 df <- simplifyGO(mat,
             method = 'binary_cut',
             plot = TRUE,
             column_title = "GO BP terms only significant in ASC52telo",
             use_raster = FALSE,
             order_by_size = TRUE,
             fontsize_range = c(9, 18),
             max_words = 6,
             word_cloud_grob_param = list(col = 'black', 
                                          max_width = unit(200, "mm")))
})

# Save the captured plot as an SVG file
svg(paste0(out_path, "Restricted_comparison_cell_lines_cilia_only_combined_GO_BP_dotplot_specific_terms_only_ASC52telo_heatmap.svg"), width = 16, height = 8)
grid.draw(heatmap_plot)
dev.off()
## png 
##   2
grid.newpage()
grid.draw(heatmap_plot)

6.2.1.2 Process and save results

# add cluster number from GO term clustering
results <- map_cluster_number(comp_filtered_ASC52telo,
                              df = df,
                              comp = TRUE
)

# Apply the function to each row of the DataFrame
results$GeneSymbol <- sapply(results$geneID, map_geneID_to_symbol)

# save results as csv file
write.csv(results, file = paste0(out_path, "Restricted_comparison_cell_lines_cilia_only_combined_GO_BP_dotplot_specific_terms_only_ASC52telo_result.csv"))

6.2.2 Cluster results - hTERT_RPE1_serum_starved

go_id = comp_filtered_hTERT_RPE1_serum_starved@compareClusterResult$ID
mat = GO_similarity(go_id, 
                    ont = 'BP', 
                    db = 'org.Hs.eg.db', 
                    measure = "Sim_Relevance_2006")

6.2.2.1 Plot cluster heatmap

# Capture the plot
heatmap_plot <- grid.grabExpr({
 df <- simplifyGO(mat,
             method = 'binary_cut',
             plot = TRUE,
             column_title = "GO BP terms only significant in hTERT_RPE1_serum_starved",
             use_raster = FALSE,
             order_by_size = TRUE,
             fontsize_range = c(9, 18),
             max_words = 6,
             word_cloud_grob_param = list(col = 'black', 
                                          max_width = unit(200, "mm")))
})

# Save the captured plot as an SVG file
svg(paste0(out_path, "Restricted_comparison_cell_lines_cilia_only_combined_GO_BP_dotplot_specific_terms_only_hTERT_RPE1_serum_starved_heatmap.svg"), width = 16, height = 8)
grid.draw(heatmap_plot)
dev.off()
## png 
##   2
grid.newpage()
grid.draw(heatmap_plot)

6.2.2.2 Process and save results

# add cluster number from GO term clustering
results <- map_cluster_number(comp_filtered_hTERT_RPE1_serum_starved,
                              df = df,
                              comp = TRUE
)

# Apply the function to each row of the DataFrame
results$GeneSymbol <- sapply(results$geneID, map_geneID_to_symbol)

# save results as csv file
write.csv(results, file = paste0(out_path, "Restricted_comparison_cell_lines_cilia_only_combined_GO_BP_dotplot_specific_terms_only_hTERT_RPE1_serum_starved_result.csv"))

6.2.3 Cluster results - RPTEC_TERT1

go_id = comp_filtered_RPTEC_TERT1@compareClusterResult$ID
mat = GO_similarity(go_id, 
                    ont = 'BP', 
                    db = 'org.Hs.eg.db', 
                    measure = "Sim_Relevance_2006")

6.2.3.1 Plot cluster heatmap

# Capture the plot
heatmap_plot <- grid.grabExpr({
 df <- simplifyGO(mat,
             method = 'binary_cut',
             plot = TRUE,
             column_title = "GO BP terms only significant in RPTEC_TERT1",
             use_raster = FALSE,
             order_by_size = TRUE,
             fontsize_range = c(9, 18),
             max_words = 6,
             word_cloud_grob_param = list(col = 'black', 
                                          max_width = unit(200, "mm")))
})

# Save the captured plot as an SVG file
svg(paste0(out_path, "Restricted_comparison_cell_lines_cilia_only_combined_GO_BP_dotplot_specific_terms_only_RPTEC_TERT1_heatmap.svg"), width = 16, height = 8)
grid.draw(heatmap_plot)
dev.off()
## png 
##   2
grid.newpage()
grid.draw(heatmap_plot)

6.2.3.2 Process and save results

# add cluster number from GO term clustering
results <- map_cluster_number(comp_filtered_RPTEC_TERT1,
                              df = df,
                              comp = TRUE
)

# Apply the function to each row of the DataFrame
results$GeneSymbol <- sapply(results$geneID, map_geneID_to_symbol)

# save results as csv file
write.csv(results, file = paste0(out_path, "Restricted_comparison_cell_lines_cilia_only_combined_GO_BP_dotplot_specific_terms_only_RPTEC_TERT1_result.csv"))

6.2.4 Filter for terms enriched in all three cell lines

 # get results as data frame
comp_results <- comp@compareClusterResult

# Count occurrences
term_counts <- table(comp_results$ID)

# get terms in all three cell lines
terms_in_all <- names(term_counts[term_counts == 3])

# remove all unspecific terms
unspecific_terms <- comp_results %>% filter(ID %in% terms_in_all)

# create a copy of comp
comp_filtered <- comp

# update results in comp
comp_filtered@compareClusterResult <- unspecific_terms

6.2.5 Dot plot of uniquely enriched terms

# plot dotplot
dotplot(comp_filtered, showCategory = NULL) + ggtitle("GO BP Enrichment Comparison", subtitle = "qvalue < 0.01")  +
  scale_y_discrete(labels = function(x) str_wrap(x, width = 100))

# save dotplot as svg file
ggsave(paste0(out_path, "Restricted_comparison_celllines_GO_BP_dotplot_shared_terms_only.svg"), plot = last_plot(), device = "svg", width = 14, height = 50, limitsize = FALSE)

6.2.6 Cluster results - shared terms of all three cell lines

# Split by location
ASC52telo <- comp_filtered@compareClusterResult[comp_filtered@compareClusterResult$Cluster == "ASC52telo", ]
hTERT_RPE1_serum_starved <- comp_filtered@compareClusterResult[comp_filtered@compareClusterResult$Cluster == "hTERT_RPE1_serum_starved", ]
RPTEC_TERT1 <- comp_filtered@compareClusterResult[comp_filtered@compareClusterResult$Cluster == "RPTEC_TERT1", ]

# Create new compareClusterResult objects for each subset
comp_filtered_ASC52telo<- comp_filtered
comp_filtered_hTERT_RPE1_serum_starved <- comp_filtered
comp_filtered_RPTEC_TERT1 <- comp_filtered

comp_filtered_ASC52telo@compareClusterResult <- ASC52telo
comp_filtered_hTERT_RPE1_serum_starved@compareClusterResult <- hTERT_RPE1_serum_starved
comp_filtered_RPTEC_TERT1@compareClusterResult <- RPTEC_TERT1
go_id = comp_filtered_ASC52telo@compareClusterResult$ID
mat = GO_similarity(go_id, 
                    ont = 'BP', 
                    db = 'org.Hs.eg.db', 
                    measure = "Sim_Relevance_2006")

6.2.6.1 Plot cluster heatmap

# Capture the plot
heatmap_plot <- grid.grabExpr({
 df <- simplifyGO(mat,
             method = 'binary_cut',
             plot = TRUE,
             column_title = "GO BP terms only significant in all three cell lines",
             use_raster = FALSE,
             order_by_size = TRUE,
             fontsize_range = c(9, 18),
             max_words = 6,
             word_cloud_grob_param = list(col = 'black', 
                                          max_width = unit(200, "mm")))
})

# Save the captured plot as an SVG file
svg(paste0(out_path, "Restricted_comparison_cell_lines_cilia_only_combined_GO_BP_dotplot_terms_in_all_cell_lines_heatmap.svg"), width = 16, height = 8)
grid.draw(heatmap_plot)
dev.off()
## png 
##   2
grid.newpage()
grid.draw(heatmap_plot)

6.2.6.2 Process and save results

# add cluster number from GO term clustering
results <- map_cluster_number(comp_filtered_ASC52telo,
                              df = df,
                              comp = TRUE
)

# Apply the function to each row of the DataFrame
results$GeneSymbol <- sapply(results$geneID, map_geneID_to_symbol)

# save results as csv file
write.csv(results, file = paste0(out_path, "Restricted_comparison_cell_lines_cilia_only_combined_GO_BP_dotplot_terms_in_all_cell_lines_piechart_result_ASC52telo.csv"))
# add cluster number from GO term clustering
results <- map_cluster_number(comp_filtered_hTERT_RPE1_serum_starved,
                              df = df,
                              comp = TRUE
)

# Apply the function to each row of the DataFrame
results$GeneSymbol <- sapply(results$geneID, map_geneID_to_symbol)

# save results as csv file
write.csv(results, file = paste0(out_path, "Restricted_comparison_cell_lines_cilia_only_combined_GO_BP_dotplot_terms_in_all_cell_lines_piechart_result_hTERT_RPE1.csv"))
# add cluster number from GO term clustering
results <- map_cluster_number(comp_filtered_RPTEC_TERT1,
                              df = df,
                              comp = TRUE
)

# Apply the function to each row of the DataFrame
results$GeneSymbol <- sapply(results$geneID, map_geneID_to_symbol)

# save results as csv file
write.csv(results, file = paste0(out_path, "Restricted_comparison_cell_lines_cilia_only_combined_GO_BP_dotplot_terms_in_all_cell_lines_piechart_result_RPTEC_TERT1.csv"))

6.2.7 Filter for terms shared between hTERT_RPE1_serum_starved and RPTEC_TERT1

 # get results as data frame
comp_results <- comp@compareClusterResult

# Count occurrences
term_counts <- table(comp_results$ID)

# get terms in all three cell lines
terms_in_all <- names(term_counts[term_counts == 3])

# Remove terms of ASC52telo
comp_results <- comp_results %>% filter(Cluster != "ASC52telo")

# Count occurrences
term_counts <- table(comp_results$ID)

# Filter proteins that appear at least twice
terms_at_least_twice <- names(term_counts[term_counts >= 2])

# remove terms that are enriched in all cell lines from terms_at_least_twice
terms_at_least_twice <- terms_at_least_twice[!terms_at_least_twice %in% terms_in_all]

# remove terms that are enriched in more than one cell line
unspecific_terms <- comp_results[comp_results$ID %in% terms_at_least_twice,]
specific_terms <- comp_results[!comp_results$ID %in% terms_at_least_twice,]

# create a copy of comp
comp_filtered <- comp

# update results in comp
comp_filtered@compareClusterResult <- unspecific_terms

comp_filtered_RPTEC_TERT1 <- comp_filtered
comp_filtered_hTERT_RPE1 <- comp_filtered

comp_filtered_hTERT_RPE1@compareClusterResult <- comp_filtered@compareClusterResult[comp_filtered@compareClusterResult$Cluster == "hTERT_RPE1_serum_starved",]
comp_filtered_RPTEC_TERT1@compareClusterResult <- comp_filtered@compareClusterResult[comp_filtered@compareClusterResult$Cluster == "RPTEC_TERT1",]

6.2.8 Dot plot of uniquely enriched terms

# plot dotplot
dotplot(comp_filtered, showCategory = NULL) + ggtitle("GO BP Enrichment Comparison for terms shared between hTERT_RPE1_serum_starved and RPTEC_TERT1", subtitle = "qvalue < 0.01")  +
  scale_y_discrete(labels = function(x) str_wrap(x, width = 100))

# save dotplot as svg file
ggsave(paste0(out_path, "Restricted_comparison_celllines_GO_BP_dotplot_shared_hTERT_RPTEC.svg"), plot = last_plot(), device = "svg", width = 14, height = 13, limitsize = FALSE)

6.2.9 Cluster results - shared between hTERT_RPE1_serum_starved and RPTEC_TERT1

go_id = comp_filtered_hTERT_RPE1@compareClusterResult$ID
mat = GO_similarity(go_id, 
                    ont = 'BP', 
                    db = 'org.Hs.eg.db', 
                    measure = "Sim_Relevance_2006")

6.2.9.1 Plot cluster heatmap

# Capture the plot
heatmap_plot <- grid.grabExpr({
 df <- simplifyGO(mat,
             method = 'binary_cut',
             plot = TRUE,
             column_title = "GO BP terms only significant shared between hTERT_RPE1_serum_starved and RPTEC_TERT1",
             use_raster = FALSE, 
             order_by_size = TRUE,
             fontsize_range = c(9, 18),
             max_words = 6,
             word_cloud_grob_param = list(col = 'black', 
                                          max_width = unit(200, "mm")))
})

# Save the captured plot as an SVG file
svg(paste0(out_path, "Restricted_comparison_cell_lines_GO_BP_dotplot_shared_hTERT_RPTEC_heatmap.svg"), width = 16, height = 8)
grid.draw(heatmap_plot)
dev.off()
## png 
##   2
grid.newpage()
grid.draw(heatmap_plot)

6.2.9.2 Process and save results

# add cluster number from GO term clustering
results <- map_cluster_number(comp_filtered_hTERT_RPE1,
                              df = df,
                              comp = TRUE
)

# Apply the function to each row of the DataFrame
results$GeneSymbol <- sapply(results$geneID, map_geneID_to_symbol)

# save results as csv file
write.csv(results, file = paste0(out_path, "Restricted_comparison_cell_lines_GO_BP_dotplot_shared_hTERT_RPTEC_results_comp_hTERT.csv"))
# add cluster number from GO term clustering
results <- map_cluster_number(comp_filtered_RPTEC_TERT1,
                              df = df,
                              comp = TRUE
)

# Apply the function to each row of the DataFrame
results$GeneSymbol <- sapply(results$geneID, map_geneID_to_symbol)

# save results as csv file
write.csv(results, file = paste0(out_path, "Restricted_comparison_cell_lines_GO_BP_dotplot_shared_hTERT_RPTEC_results_RPTEC.csv"))

6.2.10 Filter for terms shared between ASC52telo and RPTEC_TERT1

 # get results as data frame
comp_results <- comp@compareClusterResult

# Count occurrences
term_counts <- table(comp_results$ID)

# get terms in all three cell lines
terms_in_all <- names(term_counts[term_counts == 3])

# Remove terms of hTERT_RPE1_serum_starved
comp_results <- comp_results %>% filter(Cluster != "hTERT_RPE1_serum_starved")

# Count occurrences
term_counts <- table(comp_results$ID)

# Filter proteins that appear at least twice
terms_at_least_twice <- names(term_counts[term_counts >= 2])

# remove terms that are enriched in all cell lines from terms_at_least_twice
terms_at_least_twice <- terms_at_least_twice[!terms_at_least_twice %in% terms_in_all]

# remove terms that are enriched in more than one cell line
unspecific_terms <- comp_results[comp_results$ID %in% terms_at_least_twice,]
specific_terms <- comp_results[!comp_results$ID %in% terms_at_least_twice,]

# create a copy of comp
comp_filtered <- comp

# update results in comp
comp_filtered@compareClusterResult <- unspecific_terms

comp_filtered_ASC52telo <- comp_filtered
comp_filtered_RPTEC_TERT1 <- comp_filtered

comp_filtered_ASC52telo@compareClusterResult <- comp_filtered@compareClusterResult[comp_filtered@compareClusterResult$Cluster == "ASC52telo",]
comp_filtered_RPTEC_TERT1@compareClusterResult <- comp_filtered@compareClusterResult[comp_filtered@compareClusterResult$Cluster == "RPTEC_TERT1",]

6.2.11 Dot plot of uniquely enriched terms

# plot dotplot
dotplot(comp_filtered, showCategory = NULL) + ggtitle("GO BP Enrichment Comparison of terms shared between ASC52telo and RPTEC_TERT1", subtitle = "qvalue < 0.01")  +
  scale_y_discrete(labels = function(x) str_wrap(x, width = 100))

# save dotplot as svg file
ggsave(paste0(out_path, "Restricted_comparison_celllines_GO_BP_dotplot_shared_ASC52telo_RPTEC.svg"), plot = last_plot(), device = "svg", width = 14, height = 4, limitsize = FALSE)

6.2.12 Cluster results - shared between hTERT_RPE1_serum_starved and RPTEC_TERT1

go_id = comp_filtered_ASC52telo@compareClusterResult$ID
mat = GO_similarity(go_id, 
                    ont = 'BP', 
                    db = 'org.Hs.eg.db', 
                    measure = "Sim_Relevance_2006")

6.2.12.1 Plot cluster heatmap

# Capture the plot
heatmap_plot <- grid.grabExpr({
 df <- simplifyGO(mat,
             method = 'binary_cut',
             plot = TRUE,
             column_title = "GO BP terms only significant shared between ASC52telo and RPTEC_TERT1",
             use_raster = FALSE,
             order_by_size = TRUE,
             fontsize_range = c(9, 18),
             max_words = 6,
             word_cloud_grob_param = list(col = 'black', 
                                          max_width = unit(200, "mm")))
})

# Save the captured plot as an SVG file
svg(paste0(out_path, "Restricted_comparison_cell_lines_GO_BP_dotplot_shared_ASC52telo_RPTEC_heatmap.svg"), width = 16, height = 8)
grid.draw(heatmap_plot)
dev.off()
## png 
##   2
grid.newpage()
grid.draw(heatmap_plot)

6.2.12.2 Process and save results

# add cluster number from GO term clustering
results <- map_cluster_number(comp_filtered_ASC52telo,
                              df = df,
                              comp = TRUE
)

# Apply the function to each row of the DataFrame
results$GeneSymbol <- sapply(results$geneID, map_geneID_to_symbol)

# save results as csv file
write.csv(results, file = paste0(out_path, "Restricted_comparison_cell_lines_GO_BP_dotplot_shared_ASC52telo_RPTEC_results_ASC52telo.csv"))
# add cluster number from GO term clustering
results <- map_cluster_number(comp_filtered_RPTEC_TERT1,
                              df = df,
                              comp = TRUE
)

# Apply the function to each row of the DataFrame
results$GeneSymbol <- sapply(results$geneID, map_geneID_to_symbol)

# save results as csv file
write.csv(results, file = paste0(out_path, "Restricted_comparison_cell_lines_GO_BP_dotplot_shared_ASC52telo_RPTEC_results_RPTEC.csv"))

6.2.13 Filter for terms shared between ASC52telo and hTERT_RPE1_serum_starved

 # get results as data frame
comp_results <- comp@compareClusterResult

# Count occurrences
term_counts <- table(comp_results$ID)

# get terms in all three cell lines
terms_in_all <- names(term_counts[term_counts == 3])

# Remove terms of hTERT_RPE1_serum_starved
comp_results <- comp_results %>% filter(Cluster != "RPTEC_TERT1")

# Count occurrences
term_counts <- table(comp_results$ID)

# Filter proteins that appear at least twice
terms_at_least_twice <- names(term_counts[term_counts >= 2])

# remove terms that are enriched in all cell lines from terms_at_least_twice
terms_at_least_twice <- terms_at_least_twice[!terms_at_least_twice %in% terms_in_all]

# remove terms that are enriched in more than one cell line
unspecific_terms <- comp_results[comp_results$ID %in% terms_at_least_twice,]
specific_terms <- comp_results[!comp_results$ID %in% terms_at_least_twice,]

# create a copy of comp
comp_filtered <- comp

# update results in comp
comp_filtered@compareClusterResult <- unspecific_terms

comp_filtered_ASC52telo <- comp_filtered
comp_filtered_hTERT_RPE1 <- comp_filtered

comp_filtered_ASC52telo@compareClusterResult <- comp_filtered@compareClusterResult[comp_filtered@compareClusterResult$Cluster == "ASC52telo",]
comp_filtered_hTERT_RPE1@compareClusterResult <- comp_filtered@compareClusterResult[comp_filtered@compareClusterResult$Cluster == "hTERT_RPE1_serum_starved",]

6.2.14 Dot plot of uniquely enriched terms

# plot dotplot
dotplot(comp_filtered, showCategory = NULL) + ggtitle("GO BP Enrichment Comparison of terms shared between ASC52telo and hTERT_RPE1_serum_starved", subtitle = "qvalue < 0.01")  +
  scale_y_discrete(labels = function(x) str_wrap(x, width = 100))

# save dotplot as svg file
ggsave(paste0(out_path, "Restricted_comparison_celllines_GO_BP_dotplot_shared_ASC52telo_hTERT.svg"), plot = last_plot(), device = "svg", width = 14, height = 5, limitsize = FALSE)

6.2.15 Cluster results - shared between hTERT_RPE1_serum_starved and RPTEC_TERT1

go_id = comp_filtered_ASC52telo@compareClusterResult$ID
mat = GO_similarity(go_id, 
                    ont = 'BP', 
                    db = 'org.Hs.eg.db', 
                    measure = "Sim_Relevance_2006")

6.2.15.1 Plot cluster heatmap

# Capture the plot
heatmap_plot <- grid.grabExpr({
 df <- simplifyGO(mat,
             method = 'binary_cut',
             plot = TRUE,
             column_title = "GO BP terms only significant shared between ASC52telo and hTERT_RPE1_serum_starved",
             use_raster = FALSE,
             order_by_size = TRUE,
             fontsize_range = c(9, 18),
             max_words = 6,
             word_cloud_grob_param = list(col = 'black', 
                                          max_width = unit(200, "mm")))
})

# Save the captured plot as an SVG file
svg(paste0(out_path, "Restricted_comparison_cell_lines_GO_BP_dotplot_shared_ASC52telo_hTERT_heatmap.svg"), width = 16, height = 8)
grid.draw(heatmap_plot)
dev.off()
## png 
##   2
grid.newpage()
grid.draw(heatmap_plot)

6.2.15.2 Process and save results

# add cluster number from GO term clustering
results <- map_cluster_number(comp_filtered_ASC52telo,
                              df = df,
                              comp = TRUE
)

# Apply the function to each row of the DataFrame
results$GeneSymbol <- sapply(results$geneID, map_geneID_to_symbol)

# save results as csv file
write.csv(results, file = paste0(out_path, "Restricted_comparison_cell_lines_GO_BP_dotplot_shared_ASC52telo_hTERT_results_ASC52telo.csv"))
# add cluster number from GO term clustering
results <- map_cluster_number(comp_filtered_hTERT_RPE1,
                              df = df,
                              comp = TRUE
)

# Apply the function to each row of the DataFrame
results$GeneSymbol <- sapply(results$geneID, map_geneID_to_symbol)

# save results as csv file
write.csv(results, file = paste0(out_path, "Restricted_comparison_cell_lines_GO_BP_dotplot_shared_ASC52telo_hTERT_results_hTERT.csv"))